import json
import re
import requests
from bs4 import BeautifulSoup


def zhiwang(id):
    """
    基于词语知网数据查询
    :param id: 页面id
    :return: 查询返回json包
    """
    headers = {
        "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
        "Accept-Encoding": "gzip, deflate", "Accept-Language": "zh-CN,zh;q=0.9",
        "Cache-Control": "max-age=0",
        "Connection": "keep-alive", "Host": "wiki.cnki.com.cn",
        "Upgrade-Insecure-Requests": "1",
        "Cookie": "UM_distinctid=173c16e519d73a-058630ec26edf5-3323765-144000-173c16e519e6d0; SID_wiki=018060; CNZZDATA3412177=cnzz_eid%3D2011079095-1596676096-null%26ntime%3D1597380140",
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.105 Safari/537.36"}
    url = "http://wiki.cnki.com.cn/HotWord/"+str(id)+".htm"
    response = requests.get(url, headers=headers)
    soup = BeautifulSoup(response.text, 'html.parser')
    content = {}
    for element in soup.find_all(class_='explain'):
        for element_one in element.find_all('p'):
            content['概述'] = re.sub(('[\n \r \t  ：\xa0\u3000]'), '', element_one.get_text())
    list_content = []
    list_content_key = ['相似词', '相关词']
    for element in soup.find_all(class_='sidepan2'):
        for element_one in element.find_all(class_='thesis'):
            word_value = []
            for element_three in element_one.find_all("ul"):
                for element_four in element_three.find_all('li'):
                    for element_five in element_four.find_all('a'):
                        word_value.append(element_five['title'])
            list_content.append(word_value)
    dict_content_list = dict(zip(list_content_key, list_content))
    content.update(dict_content_list)
    qikan_list = []
    words = []
    for element in soup.find_all(class_='essay_list'):
        for element_one in element.find_all('dl'):
            for element_two in element_one.find_all('dt'):
                for element_three in element_two.find_all('strong'):
                    for element_four in element_three.find_all('a'):
                        qikan_list.append('http:' + element_four['href'])
            temp_word = []
            for element_five in element_one.find_all(class_='infor'):
                for element_six in element_five.find_all('a'):
                    temp_word.append(element_six.get_text())
            words.append(temp_word)
    content['期刊url'] = qikan_list
    content['期刊word'] = words
    return content


if __name__ == '__main__':
    print(json.dumps(zhiwang('3718950'), ensure_ascii=False, indent=2))
